Setup

library(tidyverse)
## Warning in system("timedatectl", intern = TRUE): running command 'timedatectl'
## had status 1
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.4     ✓ dplyr   1.0.7
## ✓ tidyr   1.1.4     ✓ stringr 1.4.0
## ✓ readr   2.0.1     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(cowplot)
all_counts_all <- readRDS(file= "Data/pc_count_matrix.rds")
avg_counts_all <- readRDS(file="Data/avg_pc_count_matrix.rds")
diff_counts <- readRDS(file="Data/diff_pc_count_matrix.rds")
meta <- suppressMessages(readr::read_tsv("Data/complete_meta_data.tsv"))
meta <- meta %>% mutate(tissue_type=replace(tissue_type, tissue_type=="embryonic facial prominence", "facial prominence"))
meta <- meta %>% mutate(tissue_type=replace(tissue_type, tissue_type=="skeletal muscle tissue", "muscle tissue"))
meta <- meta %>% mutate(tissue_type=replace(tissue_type, tissue_type=="skeletal muscle tissue", "muscle tissue"))
tissue_types <- unique(meta %>% pull(tissue_type))
TFs<- suppressMessages(read_delim("mouse_tfs.tsv"))
TFs_sym <- TFs %>% pull(Symbol)
all_counts <- all_counts_all[rownames(all_counts_all) %in% (TFs %>% pull(Symbol)),]
avg_counts <- avg_counts_all[rownames(avg_counts_all) %in% (TFs %>% pull(Symbol)),]
collapsed_dev_counts <- matrix(data=NA, nrow=nrow(all_counts),ncol=length(tissue_types))
walker <- 1
for (tissue in tissue_types){
  tissue_subset_ids <- meta %>% filter(tissue_type == tissue) %>% pull(id)
  tissue_subset_count <- all_counts[,tissue_subset_ids]
  tissue_avg <- rowMeans(tissue_subset_count)
  collapsed_dev_counts[,walker] <- tissue_avg
  walker <- walker + 1
}
colnames(collapsed_dev_counts) <- tissue_types
rownames(collapsed_dev_counts) <- rownames(all_counts)
#TFs<- c("Ascl1", "Hes1", "Neurod1", "Mecp2", "Mef2c", "Runx1", "Tcf4", "Pax6")

PCA coloured by dev_stage

##Remove zero variance 
all_counts_drop_var <- (t(all_counts))[, which(apply(t(all_counts),2,var) !=0)]
all_counts.pca <- prcomp(all_counts_drop_var, center = TRUE,scale. = TRUE)
all_counts.pca.values <- data.frame(all_counts.pca$x, dev_stage = NA, tissue_type =NA)
for (sample in base::rownames(all_counts.pca.values)){
  all_counts.pca.values[sample, "dev_stage"] <- meta %>% filter(id == sample) %>% pull(dev_stage)
  all_counts.pca.values[sample, "tissue_type"] <- meta %>% filter(id == sample) %>% pull(tissue_type)
}
library(ggfortify)
library(ggrepel)
library(RColorBrewer)

c25 <- c(
  "dodgerblue2", "#E31A1C", # red
  "green4",
  "#6A3D9A", # purple
  "#FF7F00", # orange
  "black", "gold1",
  "skyblue2", "#FB9A99", # lt pink
  "palegreen2",
  "#CAB2D6", # lt purple
  "#FDBF6F", # lt orange
  "gray70", "khaki2",
  "maroon", "orchid1", "deeppink1", "blue1", "steelblue4",
  "darkturquoise", "green1", "yellow4", "yellow3",
  "darkorange4", "brown"
)
#autoplot(all_counts.pca, data=meta, colour="dev_stage") + theme_cowplot(12) +  scale_color_manual(values = c25) 
#autoplot(all_counts.pca, data=meta %>% select(id, dev_stage), colour="dev_stage", label.size = 3, size=5) + theme_cowplot(12) +  scale_color_manual(values = c25)
ggplot(data=all_counts.pca.values, aes(x=PC1, y=PC2, colour=dev_stage)) + theme_cowplot(20) +  scale_color_manual(values = c25) + geom_point( size=5) + xlab(paste("PC1", round(all_counts.pca$sdev[1]^2/ sum(all_counts.pca$sdev^2) *100, 2), "%")) +  ylab(paste("PC2", round(all_counts.pca$sdev[2]^2/ sum(all_counts.pca$sdev^2) *100, 2), "%"))

PCA coloured by tissue

#getPalette = colorRampPalette(brewer.pal(12, "Paired"))
#autoplot(all_counts.pca, data=meta, colour="tissue_type", label.size = 3, size=5) + theme_cowplot(12) +  scale_color_manual(values = c25)
ggplot(data=all_counts.pca.values, aes(x=PC1, y=PC2, colour=tissue_type)) + theme_cowplot(20) +  scale_color_manual(values = c25) + geom_point( size=5) + xlab(paste("PC1", round(all_counts.pca$sdev[1]^2/ sum(all_counts.pca$sdev^2) *100, 2), "%")) +  ylab(paste("PC2", round(all_counts.pca$sdev[2]^2/ sum(all_counts.pca$sdev^2) *100, 2), "%"))

Pearson correlation heatmap

library(Hmisc)
all_counts_pear <- all_counts
colnames(all_counts_pear) <- sapply(colnames(all_counts), function(x){temp_row <- meta %>% filter(id==x) 
                                                                             return(paste0(temp_row %>% pull(tissue_type), " ",temp_row %>% pull(dev_stage)))})
all_counts_pear <- all_counts_pear[,order(colnames(all_counts_pear))]
pear_rcorr_object <- rcorr(all_counts_pear, type = "pearson")
pear_matrix <- pear_rcorr_object$r
pear_matrix_pvalues <- pear_rcorr_object$P
library(corrplot)

corrplot(pear_matrix, method = 'shade', type = 'lower', diag = FALSE, col = rep(rev(brewer.pal(n=8, name="RdYlBu")),2), col.lim=c(0,1))

Pearson correlation heatmap (collapsed by samples)

library(corrplot)
library(Hmisc)
avg_counts_pear <- avg_counts
colnames(avg_counts_pear) <- sapply(colnames(avg_counts), function(x){temp_row <- meta %>% filter(id==x) 
                                                                             return(paste0(temp_row %>% pull(tissue_type), " ",temp_row %>% pull(dev_stage)))})
avg_counts_pear <- avg_counts_pear[,order(colnames(avg_counts_pear))]
pear_rcorr_object <- rcorr(avg_counts_pear, type = "pearson")
pear_matrix <- pear_rcorr_object$r
pear_matrix_pvalues <- pear_rcorr_object$P



corrplot(pear_matrix, method="shade", type="lower", diag=FALSE, col = rep(rev(brewer.pal(n=8, name="RdYlBu")),2), col.lim=c(0,1), order="original", is.corr = FALSE)

Pearson correlation heatmap (collapsed by samples) 17 clusters (because there are 17 tissues)

library(corrplot)
library(Hmisc)
avg_counts_pear <- avg_counts
colnames(avg_counts_pear) <- sapply(colnames(avg_counts), function(x){temp_row <- meta %>% filter(id==x) 
                                                                             return(paste0(temp_row %>% pull(tissue_type), " ",temp_row %>% pull(dev_stage)))})
pear_rcorr_object <- rcorr(avg_counts_pear, type = "pearson")
pear_matrix <- pear_rcorr_object$r
pear_matrix_pvalues <- pear_rcorr_object$P
testRes = cor.mtest(avg_counts_pear, conf.level = 0.95)


corrplot(pear_matrix, order = 'hclust', addrect = 17, method="shade", rect.col = 'black', col = rep(rev(brewer.pal(n=8, name="RdYlBu")),2), col.lim=c(0,1), tl.cex = 1)

Pearson correlation heatmap (collapsed by samples) 8 clusters (because 8 unique dev stages)

library(corrplot)
library(Hmisc)
avg_counts_pear <- avg_counts
colnames(avg_counts_pear) <- sapply(colnames(avg_counts), function(x){temp_row <- meta %>% filter(id==x) 
                                                                             return(paste0(temp_row %>% pull(tissue_type), " ",temp_row %>% pull(dev_stage)))})
pear_rcorr_object <- rcorr(avg_counts_pear, type = "pearson")
pear_matrix <- pear_rcorr_object$r
pear_matrix_pvalues <- pear_rcorr_object$P
testRes = cor.mtest(avg_counts_pear, conf.level = 0.95)


corrplot(pear_matrix, order = 'hclust', addrect = 8, method="shade", rect.col = 'black', col = rep(rev(brewer.pal(n=8, name="RdYlBu")),2), col.lim=c(0,1))

Prepare Limma (with individual comparison)

# Demonstration of using limma to find differentially expressed genes

library(limma)
library(tidyverse)
library(edgeR)
library(tximport)

# load data
pc <- read.delim("Data/pc_sub.tsv", stringsAsFactors = FALSE)
files <- paste0("Data/Count_tables/", meta$id, ".tsv")
counts <- tximport(files, type = "rsem", txIn = FALSE)
## reading in files with read_tsv
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
## 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57
## 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84
## 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108
## 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128
## 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148
## 149 150 151 152 153 154 155 156
count_mat <- counts$counts

# name matrix and only keep protein coding genes
rownames(count_mat) <- str_replace(rownames(count_mat), "\\.[:digit:]*$", "")
colnames(count_mat) <- meta$id
count_mat <- count_mat[rownames(count_mat) %in% pc$Gene_ID, ]
oldrownames <- data.frame(rownames(count_mat)) 
oldrownames[,"Gene_ID"] <- oldrownames[,1]
rownames(count_mat) <- oldrownames %>% left_join(pc,"Gene_ID") %>% pull(Symbol)

#count_mat <- count_mat[rownames(count_mat) %in% (TFs %>% pull(Symbol)),]

# order by tissue and dev stage
meta <- meta %>% group_by(tissue_type) %>% arrange(dev_stage, .by_group = TRUE)
count_mat <- count_mat[, meta$id]
stopifnot(identical(colnames(count_mat), meta$id))

# build design matrix
tissue <- factor(meta$tissue_type)
design <- model.matrix(~ 0 + tissue)
colnames(design) <- gsub(' ', '_', colnames(design))
#design <- model.matrix(~ meta$tissue_type * meta$dev_stage)

# edgeR: remove lowly expressed, scale normalize, and the log2 CPM transform
dge <- DGEList(count_mat)
keep <- filterByExpr(dge, design)
dge <- dge[keep, ]

# gene counts of removed
removed <- rowSums(count_mat[!keep,])
summary(removed)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    1.00    5.00   24.36   22.41  851.45
count_mat[which.max(removed), ]
## ENCFF465YOS ENCFF516EUX ENCFF929KWG ENCFF672DDJ ENCFF772UWT ENCFF262TXH 
##         217         148          29          44          27          22 
## ENCFF146HIO ENCFF129XUH ENCFF132NQU ENCFF867TKM ENCFF924CMS ENCFF370UDF 
##          30          33          19          17          25          45 
## ENCFF369TLJ ENCFF536XKZ ENCFF302TQO ENCFF413BXV ENCFF465SNB ENCFF976OLT 
##          24          33          19          12          17          17 
## ENCFF393ZAF ENCFF488HRD ENCFF567AFL ENCFF227HKF ENCFF745ZJF ENCFF816CVP 
##          16          21          16          13          32          14 
## ENCFF763GXJ ENCFF340XFQ ENCFF590FAC ENCFF484AOO ENCFF918QNL ENCFF895JXR 
##          23          23          10          17          34          24 
## ENCFF127FPD ENCFF721XZC ENCFF226IWR ENCFF540EJL ENCFF125TAY ENCFF824DCQ 
##          79          58          81          72          44          38 
## ENCFF242GMD ENCFF976CYB ENCFF111IGW ENCFF540BJT ENCFF440PWB ENCFF219PVC 
##          81          55          60          63          85          48 
## ENCFF415JBI ENCFF871IGQ ENCFF817KPY ENCFF155GNG ENCFF070GIC ENCFF097IDM 
##          50          58          60          41          36          42 
## ENCFF750FTK ENCFF109HTF ENCFF184ZAV ENCFF684GSP ENCFF131WIM ENCFF604LWF 
##          74          53          39          32          26          31 
## ENCFF206KRT ENCFF741FZD ENCFF395LAH ENCFF338ZXD ENCFF211ELX ENCFF830YBR 
##          43          18          31          30          34          44 
## ENCFF798MSP ENCFF861GUP ENCFF959OHE ENCFF228SAS ENCFF052THP ENCFF114YCL 
##          32          46          16          28          17          35 
## ENCFF443JRH ENCFF278PAQ ENCFF485CJB ENCFF795XBQ ENCFF499WRT ENCFF413OJO 
##          23          36          21          21         120         128 
## ENCFF700YRC ENCFF347HOK ENCFF752QKG ENCFF143OJZ ENCFF905HUL ENCFF783LVC 
##         122         129         120         162         257         326 
## ENCFF399HJB ENCFF597VHC ENCFF195JHC ENCFF457ZGF ENCFF982ERQ ENCFF449QSP 
##          22          51          34          32          20          27 
## ENCFF358WYS ENCFF634AUL ENCFF677BPV ENCFF794QMH ENCFF532ZDE ENCFF003DBZ 
##          30          22          28          15          29          15 
## ENCFF954EHG ENCFF523MEO ENCFF669WDQ ENCFF997HBI ENCFF615ZTQ ENCFF336VTP 
##          38          28          26          20          20          22 
## ENCFF432ZGG ENCFF572OPZ ENCFF740SXP ENCFF504YJB ENCFF759PUL ENCFF512KYX 
##           5           8           8           7          11           4 
## ENCFF875HIA ENCFF143HKK ENCFF872ABP ENCFF226ILJ ENCFF718PJC ENCFF996EMC 
##          10          20          50          48          38          43 
## ENCFF538RNR ENCFF365SND ENCFF990GIB ENCFF658LQM ENCFF996YFF ENCFF421TRR 
##          54          40          70          63          23          37 
## ENCFF359ZOA ENCFF971KZC ENCFF168CVY ENCFF390KQM ENCFF422BJI ENCFF196WAD 
##          57          59          15          20          24          31 
## ENCFF453GZG ENCFF870YWY ENCFF706XGJ ENCFF835FSF ENCFF918YFP ENCFF052VJO 
##          16          19          24          33          42          23 
## ENCFF210MWH ENCFF793WMU ENCFF576YVC ENCFF480GNL ENCFF375JDR ENCFF298WHK 
##          45          35          23          25           8          24 
## ENCFF532DDR ENCFF627COG ENCFF502BTV ENCFF049EIV ENCFF513HAL ENCFF967SJG 
##          11          20          20           9          10           8 
## ENCFF037GWJ ENCFF365DLM ENCFF402XKL ENCFF918XYP ENCFF600ZQX ENCFF871WCS 
##          25          29          20          31          22          35 
## ENCFF050PAT ENCFF691EQW ENCFF972NMO ENCFF355MOU ENCFF288JNN ENCFF052DOQ 
##          26          22          35          40          59          23 
## ENCFF517XLT ENCFF434XNZ ENCFF356QFG ENCFF319WZT ENCFF385MJV ENCFF923YGS 
##          41          46          15           9          25          16
# scale normalization using TMM method (default)
dge <- calcNormFactors(dge)

# convert counts to log2CPM 
cpm_counts <- cpm(dge, log = TRUE, prior.count = 3)

# fit model with limma eBayes trend 
fit <- lmFit(cpm_counts, design)
fit <- eBayes(fit, trend=TRUE)

Get all contrasts (one tissue vs all for every tissue)

## These are one way to get contrasts by creating new matrix where -1 what we want to subtract and 1 what we want to keep
#   contrast.matrix <- matrix(data=-1, nrow=length(tissue_types), ncol = length(tissue_types))
# for(i in 1:length(tissue_types)){
#   contrast.matrix[i,i] <- 1
# }
# colnames(contrast.matrix) <- tissue_types
# 
# fit2 <- contrasts.fit(fit, contrasts = contrast.matrix)
# fit2 <- eBayes(fit2)


### Get all contrasts by accounting each tissue type as 1/16
all_contrast_strings <- c()
for (name in colnames(design)){
  all_temp <- paste(colnames(design)[colnames(design) != name], collapse = "+")
  all_temp <- paste0("(", all_temp, ")/", length(colnames(design)) - 1)
  full_string <- paste0(name, "-", all_temp)
  all_contrast_strings <- c(all_contrast_strings, full_string) 
}
con <- makeContrasts(contrasts = all_contrast_strings, levels=design)
colnames(con) <- colnames(design)
fit3 <- contrasts.fit(fit, contrasts=con)
fit3 <- eBayes(fit3, trend=TRUE)

Extract all toptables into list

diff_list_non_sig <- list() 
diff_list <- list()
for(tissue in colnames(design)){
  #diff_list[tissue] <- list(topTable(fit3,coef = tissue, number = Inf, p.value=0.05, sort.by = "p", resort.by = "logFC"))
  #diff_list_non_sig[tissue] <- list(topTable(fit3,coef = tissue, number = Inf, sort.by = "p", resort.by="logFC"))
    diff_list[tissue] <- list(topTable(fit3,coef = tissue, number = Inf, p.value=0.05, sort.by = "p"))
  diff_list_non_sig[tissue] <- list(topTable(fit3,coef = tissue, number = Inf, sort.by = "p"))
}

for (tissue in names(diff_list)){
diff_list[[tissue]] <- diff_list[[tissue]][rownames(diff_list[[tissue]]) %in% TFs_sym,]
}
diff_list.FC.Cutoff <- diff_list
for (tissue in names(diff_list)){
diff_list.FC.Cutoff[[tissue]] <- diff_list[[tissue]][diff_list[[tissue]]$logFC >= 1.5,]
}

gene_list <- c()
global_gene_list <-c()
gene_list_with_names <- list()
global_gene_list_with_names <- list()
top_n <- 2
avg_counts_collapse_tissue <- matrix(data=NA, nrow=nrow(all_counts_all), ncol=length(unique(meta$tissue_type)))
colnames(avg_counts_collapse_tissue) <- unique(meta$tissue_type)
rownames(avg_counts_collapse_tissue) <- rownames(all_counts_all)
walker <- 1
for (tissue in unique(meta$tissue_type)){
  avg_counts_collapse_tissue[,walker] <- apply(all_counts_all[,meta %>% filter(tissue_type == tissue) %>% pull(id)],1, mean)
  walker <- 1 + walker
}
for (column in colnames(design)){
  gene_list_with_names[column] <- list(diff_list.FC.Cutoff[[column]][1:top_n,] %>% rownames())
  global_gene_list_with_names[column] <- list(diff_list[[column]] %>% rownames())
}
for (column in colnames(design)){
  gene_list <- c(gene_list, diff_list.FC.Cutoff[[column]][1:top_n,] %>% rownames())
  global_gene_list <- c(gene_list, diff_list[[column]] %>% rownames())
}

Using 0.05 as cutoff, the numbers of differential expressed genes per tissue

get_sig_diff <- function(x){
  return(x = length(x$adj.P.Val))
}
sig_lengths <- sapply(diff_list, get_sig_diff, USE.NAMES = TRUE)
sig_lengths <- data.frame(sig_lengths)
base::rownames(sig_lengths) <- sub("tissue", "", base::rownames(sig_lengths))
sig_lengths <- sig_lengths %>% rownames_to_column()
ggplot(data=sig_lengths, aes(x=rowname, y=sig_lengths)) + geom_bar(stat='identity') +theme_cowplot(20) + theme(axis.text.x = element_text(size = 20, angle = 45, hjust = 1)) +xlab("tissue type") 

#sig_lengths_temp <- sig_lengths

see if there are overlaps in the top 50 genes in the brains

t.forebrain <- topTable(fit3, coef = "tissueforebrain", number=50, sort.by = "logFC", resort.by = "P", p.value=0.05)
t.midbrain <- topTable(fit3, coef = "tissuemidbrain", number=50, sort.by = "logFC", resort.by = "P", p.value=0.05)
t.hindbrain <- topTable(fit3, coef = "tissuehindbrain", number=50, sort.by = "logFC", resort.by = "P", p.value=0.05)
tables <- list(t.forebrain, t.midbrain, t.hindbrain)
table.sets <- list()
walker <- 1
for (table in tables){
  table.sets[[walker]] <- data.frame(table) %>% rownames_to_column() %>% pull("rowname")
  walker <- 1 + walker
}
intersects__ <- base::intersect(table.sets[[1]], table.sets[[2]])
intersects__ <- base::intersect(intersects__, table.sets[[3]])
print(paste0("Intersected genes are : ", intersects__))
##  [1] "Intersected genes are : Zic4"    "Intersected genes are : Otx2"   
##  [3] "Intersected genes are : Zic3"    "Intersected genes are : Zic1"   
##  [5] "Intersected genes are : Pou3f2"  "Intersected genes are : Gad2"   
##  [7] "Intersected genes are : Elavl3"  "Intersected genes are : Ncan"   
##  [9] "Intersected genes are : Nhlh2"   "Intersected genes are : Hes5"   
## [11] "Intersected genes are : Cntn2"   "Intersected genes are : Tubb3"  
## [13] "Intersected genes are : Slc17a6" "Intersected genes are : Slc6a1" 
## [15] "Intersected genes are : Stmn3"   "Intersected genes are : Sez6"   
## [17] "Intersected genes are : Myt1l"   "Intersected genes are : Fabp4"
  scale = "row", cexRow=1,cexCol=1,margins=c(12,8),srtCol=45)

Heatmap of the top 5 genes per tissue (expressoin data)

gene_anno <- data.frame(matrix(data=NA, nrow=length(unique(gene_list)),ncol=17), row.names=unique(gene_list))
colnames(gene_anno) <- colnames(design)
for (col in colnames(design)){
  for (i in 1:2){
      gene_anno[gene_list_with_names[[col]][i], col] <- col
  }
  #   for (i in 1:length(gene_list_with_names[[col]])){
  #     gene_anno[gene_list_with_names[[col]][i], col] <- col
  # }

}
library(pheatmap)
library(RColorBrewer)
colnames(gene_anno) <- sub("tissue", "", colnames(gene_anno))
breaksList = seq(-2, 10, by = 1)
pheatmap(avg_counts_collapse_tissue[unique(gene_list),], annotation_row =rev(gene_anno), treeheight_row = 0, treeheight_col = 0, cluster_rows = F, scale="row", cluster_cols = F, annotation_legend = F, fontsize_row=20,fontsize_col=20) 

Heatmap of the top 5 genes per tissue (by logFC)

heatmaplogFC <- matrix(data=NA, nrow=length(unique(gene_list)), ncol=17)
colnames(heatmaplogFC) <- colnames(design)
rownames(heatmaplogFC) <- unique(gene_list)

for (col in colnames(heatmaplogFC)){
  for (gene in unique(gene_list)){
      heatmaplogFC[gene,col] <- diff_list[[col]][gene,"logFC"]
  }

}
library(pheatmap)
colnames(heatmaplogFC) <- sub("tissue", "", colnames(heatmaplogFC))
colnames(gene_anno) <- sub("tissue", "", colnames(gene_anno))
pheatmap(heatmaplogFC, annotation_row =rev(gene_anno), treeheight_row = 0, treeheight_col = 0, cluster_rows = F, cluster_cols = F, annotation_legend = F)

Top Genes view of DE heatmap

gene_list <- c()
global_gene_list <-c()
gene_list_with_names <- list()
global_gene_list_with_names <- list()
top_n <- 100
avg_counts_collapse_tissue <- matrix(data=NA, nrow=nrow(all_counts_all), ncol=length(unique(meta$tissue_type)))
colnames(avg_counts_collapse_tissue) <- unique(meta$tissue_type)
rownames(avg_counts_collapse_tissue) <- rownames(all_counts_all)
walker <- 1
for (tissue in unique(meta$tissue_type)){
  avg_counts_collapse_tissue[,walker] <- apply(all_counts_all[,meta %>% filter(tissue_type == tissue) %>% pull(id)],1, mean)
  walker <- 1 + walker
}
for (column in colnames(design)){
  gene_list_with_names[column] <- list(diff_list.FC.Cutoff[[column]][1:min(top_n, nrow(diff_list.FC.Cutoff[[column]])),] %>% rownames())
  global_gene_list_with_names[column] <- list(diff_list[[column]] %>% rownames())
}
for (column in colnames(design)){
  gene_list <- c(gene_list, diff_list.FC.Cutoff[[column]][1:min(top_n, nrow(diff_list.FC.Cutoff[[column]])),] %>% rownames())
  global_gene_list <- c(gene_list, diff_list[[column]] %>% rownames())
}



gene_anno <- data.frame(matrix(data=NA, nrow=length(unique(gene_list)),ncol=17), row.names=unique(gene_list))
colnames(gene_anno) <- colnames(design)
for (col in colnames(design)){
    for (i in 1:length(gene_list_with_names[[col]])){
      gene_anno[gene_list_with_names[[col]][i], col] <- col
  }

}
library(pheatmap)
colnames(gene_anno) <- sub("tissue", "", colnames(gene_anno))
pheatmap(avg_counts_collapse_tissue[unique(gene_list),], annotation_row =rev(gene_anno), treeheight_row = 0, treeheight_col = 0, cluster_rows = F, scale="row", cluster_cols = F, annotation_legend = F, show_rownames = FALSE,fontsize_col=20)

Global view of DE heatmap

gene_anno <- data.frame(matrix(data=NA, nrow=length(unique(global_gene_list)),ncol=17), row.names=unique(global_gene_list))
colnames(gene_anno) <- colnames(design)
for (col in colnames(design)){
    for (i in 1:length(global_gene_list_with_names[[col]])){
      gene_anno[global_gene_list_with_names[[col]][i], col] <- col
  }

}
library(pheatmap)
colnames(gene_anno) <- sub("tissue", "", colnames(gene_anno))
pheatmap(avg_counts_collapse_tissue[unique(global_gene_list),], annotation_row =rev(gene_anno), treeheight_row = 0, treeheight_col = 0, cluster_rows = F, scale="row", cluster_cols = F, annotation_legend = F, show_rownames = FALSE,fontsize_col=20)

Another way of graphing (average samples using only the top 50 genes)

top_n <- 50
gene_list <- c()
for (i in 1:17){
  #gene_list <- c(gene_list, topTable(fit3, coef = i, number=top_n, sort.by = "logFC", resort.by = "P", p.value=0.05) %>% rownames())
  gene_list <- c(gene_list, (diff_list[[i]] %>% rownames())[i:top_n])
}

avg_counts_pear <- avg_counts_all[gene_list,]
colnames(avg_counts_pear) <- sapply(colnames(avg_counts_all), function(x){temp_row <- meta %>% filter(id==x) 
                                                                             return(paste0(temp_row %>% pull(tissue_type), " ",temp_row %>% pull(dev_stage)))})
pear_rcorr_object <- rcorr(avg_counts_pear, type = "pearson")
pear_matrix <- pear_rcorr_object$r
pear_matrix_pvalues <- pear_rcorr_object$P
testRes = cor.mtest(avg_counts_pear, conf.level = 0.95)
corrplot(pear_matrix, method="shade", type="lower", diag=FALSE, col = rep(rev(brewer.pal(n=8, name="RdYlBu")),2))

Another way of graphing (Collapsed by time series and using only the top 50 genes)

pear_rcorr_object <- rcorr(avg_counts_collapse_tissue[gene_list,], type = "pearson")
pear_matrix <- pear_rcorr_object$r
pear_matrix_pvalues <- pear_rcorr_object$P
corrplot(pear_matrix, method="shade", type="lower", diag=FALSE, col = rep(rev(brewer.pal(n=8, name="RdYlBu")),2), order="hclust")

Top 100 expressed genes visuallized by set

top_n <- 100
library(UpSetR)
## 
## Attaching package: 'UpSetR'
## The following object is masked from 'package:lattice':
## 
##     histogram
top_diff_list <- list() 
for(tissue in colnames(design)){
  #top_diff_list[tissue] <- list(topTable(fit3,coef = tissue, number = 100,p.value=0.05) %>% rownames())
  top_diff_list[tissue] <- list(diff_list[[tissue]][1:top_n,] %>% rownames())
}
names(top_diff_list) <- gsub("tissue", "", names(top_diff_list))
set_colors <- c("gray23","gray23","maroon","gray23","maroon","gray23","gray23","gray23","gray23","gray23","maroon","gray23","maroon","gray23","gray23","gray23","gray23")


upset(fromList(top_diff_list), nsets=17, nintersects=50, order.by = "freq", decreasing = TRUE, sets.bar.color=set_colors)

Top 100 expressed genes in brain visuallized by set

upset(fromList(top_diff_list[grepl("brain", names(top_diff_list))]), nsets=3, nintersects=60, order.by = "freq", decreasing = TRUE)

Attempting the same thing but with complexupsetr

top_n <- 100
library(UpSetR)
top_diff_list <- list() 
for(tissue in colnames(design)){
  #top_diff_list[tissue] <- list(topTable(fit3,coef = tissue, number = 100,p.value=0.05) %>% rownames())
  top_diff_list[tissue] <- list(diff_list.FC.Cutoff[[tissue]][1:min(top_n, nrow(diff_list.FC.Cutoff[[tissue]])),] %>% rownames())
}
names(top_diff_list) <- gsub("tissue", "", names(top_diff_list))
neural_parts <- c('hindbrain', "neural_tube", "forebrain", "midbrain")
all_unique_top <- unique(unlist(top_diff_list))
complex_up_frame <- data.frame(matrix(data=FALSE, nrow=length(all_unique_top), ncol=17))
rownames(complex_up_frame) <- all_unique_top
colnames(complex_up_frame) <- names(top_diff_list)
for (gene in rownames(complex_up_frame)){
  for(tissue in names(top_diff_list)){
    if(gene %in% top_diff_list[[tissue]]){
      complex_up_frame[gene,tissue] <- TRUE
    }
  }
}
gene_metadata = data.frame(
  set=names(top_diff_list),
    brain_or_not= ifelse((grepl("brain", names(top_diff_list)) | grepl("neural", names(top_diff_list))),"Neural","Non-neural")
)
ComplexUpset::upset(complex_up_frame, names(top_diff_list), min_size=3, set_sizes = FALSE,stripes= ComplexUpset::upset_stripes(
        mapping=aes(color=brain_or_not),
        colors=c(
            'Neural'='coral',
            'Non-neural'='grey61'
        ),
        data=gene_metadata
    ), queries=list(
        ComplexUpset::upset_query(
            intersect=neural_parts,
            color='maroon',
            fill='maroon',
        )
    )
)

what are the six elements?

neuro_tf_list <- rownames(complex_up_frame)[apply(!(complex_up_frame %>% dplyr::select(-midbrain, -hindbrain,-forebrain, -neural_tube)), 1,all) & apply(complex_up_frame %>% dplyr::select(midbrain, hindbrain,forebrain, neural_tube), 1,all)]
rownames(complex_up_frame)[apply(!(complex_up_frame %>% dplyr::select(-midbrain, -hindbrain,-forebrain, -neural_tube)), 1,all) & apply(complex_up_frame %>% dplyr::select(midbrain, hindbrain,forebrain, neural_tube), 1,all)]
##  [1] "Pou3f2" "Barhl2" "Sox1"   "Scrt2"  "Hes5"   "Dmrtb1" "Lhx5"   "Scrt1" 
##  [9] "Rfx4"   "Pbx4"   "Zfp2"   "Otp"    "Csrnp3" "Npas3"  "Gsx1"   "Zfp811"
## [17] "Pou6f2" "Myt1l"  "Meis3"

Graph the expression distribution of these genes in each neural part

TFCounts <- data.frame(all_counts) %>% rownames_to_column("rowname")
TFCounts <- TFCounts %>% gather("id", "counts", -rowname)
TFCounts <- TFCounts %>% left_join((meta %>% dplyr::select(tissue_type, dev_stage, id)), by="id")
TFCounts.neuralOnly <- TFCounts %>% filter(tissue_type %in% c('hindbrain', "neural tube", "forebrain", "midbrain") & rowname %in% neuro_tf_list)

ggplot(data=TFCounts.neuralOnly, aes(y=counts, x=tissue_type)) + geom_boxplot()+ facet_wrap(~ rowname, scales = "free")  + theme_cowplot() + theme(axis.text.x = element_text(size = 20, angle = 45, hjust = 1))

Expression time series per gene in only neural tissues

TFCounts.avg <- data.frame(avg_counts) %>% rownames_to_column("rowname")
TFCounts.avg <- TFCounts.avg %>% gather("id", "counts", -rowname)
TFCounts.avg <- TFCounts.avg %>% left_join((meta %>% select(tissue_type, dev_stage, id)), by="id")
TFCounts.neuralOnly_collapsed <- TFCounts.avg %>% filter(tissue_type %in% c('hindbrain', "neural tube", "forebrain", "midbrain") & rowname %in% neuro_tf_list)

ggplot(data=TFCounts.neuralOnly_collapsed, aes(y=log2(counts+1), x=dev_stage, group=tissue_type)) + geom_line(aes(colour=tissue_type)) + geom_point() + theme(axis.text.x = element_text(size = 12, angle = 45, hjust = 1)) + facet_wrap(~ rowname, scales = "free")  + theme_cowplot(20) + theme(axis.text.x = element_text(size = 15, angle = 45, hjust = 1))

Expression time series per gene in all tissues

ggplot(data=TFCounts.avg %>% filter(rowname %in% neuro_tf_list), aes(y=log2(counts+1), x=dev_stage, group=tissue_type)) + geom_line(aes(colour=tissue_type)) + geom_point() + theme(axis.text.x = element_text(size = 12, angle = 45, hjust = 1)) + facet_wrap(~ rowname, scales = "free")  + theme_cowplot(20) + theme(axis.text.x = element_text(size = 15, angle = 45, hjust = 1))

library(EnhancedVolcano)
EnhancedVolcano(topTable(fit3,coef="tissuehindbrain",number=Inf), x="logFC", y="adj.P.Val", lab=rownames(topTable(fit3,coef="tissuehindbrain",number=Inf)), pCutoff = 0.05, FCcutoff=0, labSize = 6.0, legendLabSize = 20) +theme(text=element_text(20))

library(EnhancedVolcano)
EnhancedVolcano(topTable(fit3,coef="tissuehindbrain",number=Inf)[TFs_sym,], x="logFC", y="adj.P.Val", lab=rownames(topTable(fit3,coef="tissuehindbrain",number=Inf)[TFs_sym,]), pCutoff = 0.05, FCcutoff=0, labSize = 6.0, legendLabSize = 20) +theme(text=element_text(20))

library(EnhancedVolcano)
EnhancedVolcano(topTable(fit3,coef="tissuestomach",number=Inf)[TFs_sym,], x="logFC", y="adj.P.Val", lab=rownames(topTable(fit3,coef="tissuestomach",number=Inf)[TFs_sym,]), pCutoff = 0.05, FCcutoff=1.5)

sig<- c()
non_sig <- c()
for (tissue in names(diff_list)){
    if(topTable(fit3,coef=tissue, number=Inf)["Tcf15","adj.P.Val"] <= 0.05){
    sig <- c(sig, tissue)
  }else{
    non_sig <- c(non_sig, tissue)
   }
}
sig <- sub("tissue", "", sig)
non_sig <- sub("tissue", "", non_sig)
neuro_tf_list <- rownames(complex_up_frame)[apply(!(complex_up_frame %>% select(-adrenal_gland)), 1,all) & apply(complex_up_frame %>% select(adrenal_gland), 1,all)]
rownames(complex_up_frame)[apply(!complex_up_frame, 1, all)]
## character(0)
TFCounts.avg <- data.frame(avg_counts) %>% rownames_to_column("rowname")
TFCounts.avg <- TFCounts.avg %>% gather("id", "counts", -rowname)
TFCounts.avg <- TFCounts.avg %>% left_join((meta %>% select(tissue_type, dev_stage, id)), by="id")
TFCounts.avg_in_vs_out <- TFCounts.avg %>% mutate(significant=ifelse(tissue_type%in%sig, "yes", "no"))
ggplot(data=TFCounts.avg_in_vs_out %>% filter(rowname=="Tcf15"), aes(x=significant, y=counts)) + geom_boxplot()

have top 10 diff expressed per tissue, point to hindbrain genes interesting Inter tissue vs Intra tissue gene patterns across timme series Histogram of how many differentially expressed per tissue Heatmap shows top 10 adjusted p-value and show their logFC only keep upregulated See the variance across the time series (find more variable and find paper)